# Functions
threshold <- function(x, condition = 0, comparator = "<") {

        if (comparator == ">") {
            ifelse(x > condition, return(1), return(0))
        } else if (comparator == ">=") {
            ifelse(x >= condition, return(1), return(0))
        } else if (comparator == "<=") {
            ifelse(x <= condition, return(1), return(0))
        } else if (comparator == "<") {
            ifelse(x < condition, return(1), return(0))
        } else if (comparator == "==") {
            ifelse(x == condition, return(1), return(0))
        } else if (comparator == "!=") {
            ifelse(x != condition, return(1), return(0))
        } else {
            return(FALSE)
        }
}

threshold_less_than_five <- function(x) {
    return(threshold(x, condition = 5, comparator = "<"))
}

threshold_less_than_zero <- function(x) {
    return(threshold(x, condition = 0, comparator = "<"))
}

threshold_greater_than_zero <- function(x) {
    return(threshold(x, condition = 0, comparator = ">"))
}

threshold_greater_than_one <- function(x) {
    return(threshold(x, condition = 1, comparator = ">"))
}

is_NA <- function(x) {
    ifelse(is.na(x) || is.nan(x), return(0), return(x))
}
zero_to_NA <- function(x) {
    ifelse(x != 0, return(x), return(NA))
}

# Question 1
palo <- readr::read_csv("data/uscities.csv") %>%
    st_as_sf(coords = c("lng", "lat"), crs = 4326) %>%
    filter(city == "Palo", state_name == "Iowa") %>%
    st_transform(5070)

palo_bbox <- palo %>%
    st_buffer(5000) %>%
    st_bbox() %>%
    st_as_sfc() %>%
    st_as_sf()

# Question 2.2 (src: R/scenes.R)
# To build the palo-flood-scene.csv, uncomment below:
# source("R/scenes.R")

# palo_scene <- readr::read_csv("data/palo-flood-scene.csv")[1,]$download_url %>%
#   lsat_scene_files()

# palo_scene_urls <- palo_scene %>%
#    filter(grepl(paste0('B', 1:6, ".TIF$", collapse = "|"), palo_scene$file)) %>%
#    arrange(file) %>%
#    pull(file)

# Question 2.3

# lsat_image() was not working for me due to a partition error/encryption
# in my HDD, which gave me the error "Invalid cross-device link" when
# file.rename() was called. So, I worked around this by manually
# downloading the *.TIF files via `wget` into the ~/.cache/landsat-pds/...
# directory (In Fedora).
# st <- sapply(palo_scene_urls, lsat_image)

# This results in *.TIF files still being recognized in the cache:
st <- lsat_cache_list()

# Question 2.3
bands <- stack(st) %>%
    setNames(paste0("band", 1:6))

# Question 2.4
bands_crop <- bands %>%
    crop(st_transform(palo_bbox,"+proj=utm +zone=15 +datum=WGS84 +units=m +no_defs"))

bands_crop <- setNames(bands_crop, c("Coastal", "Blue", "Green", "Red", "NIR", "SWIR 1"))

Landsat Band Attributes

The attributes of our stacked bands are given as,

  • Rows: 7,811
  • Columns: 7,681
  • Cells: 59,996,291
  • Layers: 6
  • CRS: +proj=utm +zone=15 +datum=WGS84 +units=m +no_defs
  • Resolution: 30 x 30

The attributes of our cropped bands are given as,

  • Rows: 340
  • Columns: 346
  • Cells: 117,640
  • Layers: 6
  • CRS: +proj=utm +zone=15 +datum=WGS84 +units=m +no_defs
  • Resolution: 30 x 30

Plotting Bands & Applying Stretches

RGB (4,3,2)

Traditional Color Infrared (5,4,3)

False Color, Water Focus (5,6,4)

False Color, Agriculture Focus (6,5,2)

Why Stretch?

Stretching in this case is a way of increasing clarity by normalizing the distribution of brightness across a raster. In general, these diagrams from NeonScience display the general concept behind stretching:

Darker Lighter

When stretching, there are two common methods: Linear or Histogram. Linear is the method shown in the diagrams above, where points are taken, and both the points and between areas are linearly scaled. Histogram stretching occurs conceptually by stretching the ends of the intesity (brightness/contrast) of a raster, as shown in this diagram from What-When-How.com: Histogram

Band Combinations for Delineating Surface Water Features

Here we will look at five different formulized combination of Landsat bands to highlight features from our rasters.


NDVI

Normalized Difference Vegetation Index

The NVDI uses a combination of both the NIR and Red Landsat bands with a water threshold of cells less than zero to highlight the surface water in the AOI. In our mapping, water is represented by dark blue, and vegetation is represented by red.

NDWI

Normalized Difference Water Index

The NVDWI uses a combination of both the NIR and Green Landsat bands with a water threshold of cells greater than zero to highlight the surface water in the AOI. In our mapping, water is represented by red, and vegetation is represented by blue.

MNDWI

Modified Normalized Difference Water Index

The MNDWI uses a combination of both the SWIR1 and Green Landsat bands with a water threshold of cells greater than zero to highlight the surface water in the AOI. Similar to our mapping of NVDWI, water is represented by red, while everything else is represented by blue or white.

WRI

Water Ratio Index

The WRI uses a combination of the SWIR1, NIR, Green, and Red Landsat bands with a water threshold of cells greater than one to highlight the surface water in the AOI. Similar to our mapping of NVDWI/MNDWI, water is represented by red, while everything else is represented by blue or white.

SWI

Simple Water Index

The SWI uses a combination of both the SWIR1 and Blue Landsat bands with a water threshold of cells less than five to highlight the surface water in the AOI. This index was developed in 2016 by Oupa Malahlela, and you can read bout it in his paper here. Everything is depicted as blue, where the areas of surface water are blue but visibility distinct.


Identifying Flood Zones by Clustering

Here we mask the surface water features from our above indices:

NVDI_mask <- calc(NVDI, threshold_less_than_zero) %>%
    calc(is_NA)
NDWI_mask <- calc(NDWI, threshold_greater_than_zero) %>%
    calc(is_NA)
MNDWI_mask <- calc(MNDWI, threshold_greater_than_zero) %>%
    calc(is_NA)
WRI_mask <- calc(WRI, threshold_greater_than_one) %>%
    calc(is_NA)
SWI_mask <- calc(SWI, threshold_less_than_five) %>%
    calc(is_NA)

water_features_mask <- stack(c(NVDI_mask, NDWI_mask, MNDWI_mask, WRI_mask, SWI_mask)) %>%
    setNames(c("NVDI", "NDWI", "MNDWI", "WRI", "SWI"))
plot(water_features_mask, col = colorRampPalette(c("white", "blue"))(256), axes = FALSE, box = FALSE, legend=FALSE)

# Question 5.1
set.seed(75157)

# Question 5.2
kmeans_floods <- getValues(water_features_mask) %>%
    na.omit() %>%
    scale() %>%
    kmeans(10)

Creating K-Means Clusters

k-means clustering is a method of partitioning observations into clusters based on mean. A visual depiction of the algorithm can be seen here. In our case, we will find k = 10 clusters. One thing to notice in regards to our data is that, the dimensions of the new clustered data (117,640, 5) is representative of all of the masked water features data.

# Question 5.2 (cont.)
kmeans_raster <- water_features$NVDI
values(kmeans_raster) <- kmeans_floods$cluster

# Question 5.3
most_flooded_cluster <- which.max(table(getValues(NVDI_mask), getValues(kmeans_raster)))

threshold_mfc<- function(x) {
  return(threshold(x, condition = 5, comparator = "=="))
}

most_flooded <- calc(kmeans_raster, threshold_mfc)

floods <- addLayer(most_flooded, water_features_mask) %>%
  setNames(c("k-means", "NVDI", "NDWI", "MNDWI", "WRI", "SWI"))

Visualizing Flood Data

# Question 6
knitr::kable(
  cellStats(floods, stat = "sum")*res(bands),
  col.names = c("Total Flooded Area")
)
Total Flooded Area
k.means 199740
NVDI 199950
NDWI 216330
MNDWI 358080
WRI 254040
SWI 456030

Here, we have the completed flood analysis plot:


Flood Analysis Interactive Map

floods_raster <- calc(floods, sum) %>%
    calc(zero_to_NA)

mapview(
  floods_raster,
  alpha.regions = 0.5,
  layer.name = "Flood Potential"
)

Notice here that while hovering over individual cells, there may be decimals. This is a result of the visualization of pixels in relation to resolution and opacity. In our case, we have a 30 by 30 resolution raster, and as a result of visualizing this on something of a higher resolution, we get a scaled pixel that may have a decimal.

Applying Data

Using the following video, we identify a point of interest, and we want to show if it falls into our flood areas.

In particular, the coordinates of the point we want are lon: -91.78960, lat: 42.06300, which is the circled area in this picture:

POI <- st_point(c(-91.78960, 42.06300), dim = "XY") %>%
  st_sfc(crs = 4326) %>%
  st_as_sf()

captured <- raster::extract(floods, POI)

knitr::kable(
  captured
)
k.means NVDI NDWI MNDWI WRI SWI
0 0 0 1 0 1

From the above table, we notice that only the modified normalized difference water index and simple water index contained the point we were interested in.